import os
import numpy as np
import pandas as pd
from glob import glob
import shutil
# image
import cv2
from skimage.io import imread
# TensorFlow
import tensorflow as tf
from tensorflow.keras import layers, models
# Visualisation libraries
## Text
from colorama import Fore, Back, Style
from IPython.display import Image, display, Markdown, Latex, clear_output
## progressbar
import progressbar
## plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
## seaborn
import seaborn as sns
## matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Ellipse, Polygon
from matplotlib.font_manager import FontProperties
import matplotlib.colors as mcolors
from matplotlib.colors import LinearSegmentedColormap
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from matplotlib import cm
plt.style.use('seaborn-whitegrid')
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['text.color'] = 'k'
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

A Brain tumor is considered as one of the aggressive diseases, among children and adults. Brain tumors account for 85 to 90 percent of all primary Central Nervous System(CNS) tumors. Every year, around 11,700 people are diagnosed with a brain tumor. The 5-year survival rate for people with a cancerous brain or CNS tumor is approximately 34 percent for men and36 percent for women. Brain Tumors are classified as: Benign Tumor, Malignant Tumor, Pituitary Tumor, etc. Proper treatment, planning, and accurate diagnostics should be implemented to improve the life expectancy of the patients. The best technique to detect brain tumors is Magnetic Resonance Imaging (MRI). A huge amount of image data is generated through the scans. These images are examined by the radiologist. A manual examination can be error-prone due to the level of complexities involved in brain tumors and their properties. The application of automated classification techniques using Machine Learning(ML) and Artificial Intelligence(AI)has consistently shown higher accuracy than manual classification. Hence, proposing a system performing detection and classification by using Deep Learning Algorithms using Convolution-Neural Network (CNN), Artificial Neural Network (ANN), and Transfer-Learning (TL) would be helpful to doctors all around the world.
Brain Tumors are complex. There are a lot of abnormalities in the sizes and location of the brain tumor(s). This makes it really difficult for a complete understanding of the nature of the tumor. Also, a professional Neurosurgeon is required for MRI analysis. Oftentimes in developing countries, the lack of skillful doctors and lack of knowledge about tumors makes it really challenging and time-consuming to generate reports from MRI. So an automated system on Cloud can solve this problem.
To Detect and Classify Brain Tumor using, CNN and TL; as an asset of Deep Learning and to examine the tumor position(segmentation).
The dataset contains 3 folders: yes, no, and pred which contains 3060 Brain MRI Images.
| Folder | Description |
|---|---|
| Yes | The folder yes contains 1500 Brain MRI Images that are tumorous |
| No | The folder no contains 1500 Brain MRI Images that are non-tumorous |
def Header(Text, L = 100, C = 'Blue', T = 'White'):
BACK = {'Black': Back.BLACK, 'Red':Back.RED, 'Green':Back.GREEN, 'Yellow': Back.YELLOW, 'Blue': Back.BLUE,
'Magenta':Back.MAGENTA, 'Cyan': Back.CYAN}
FORE = {'Black': Fore.BLACK, 'Red':Fore.RED, 'Green':Fore.GREEN, 'Yellow':Fore.YELLOW, 'Blue':Fore.BLUE,
'Magenta':Fore.MAGENTA, 'Cyan':Fore.CYAN, 'White': Fore.WHITE}
print(BACK[C] + FORE[T] + Style.NORMAL + Text + Style.RESET_ALL + ' ' + FORE[C] +
Style.NORMAL + (L- len(Text) - 1)*'=' + Style.RESET_ALL)
def Line(L=100, C = 'Blue'):
FORE = {'Black': Fore.BLACK, 'Red':Fore.RED, 'Green':Fore.GREEN, 'Yellow':Fore.YELLOW, 'Blue':Fore.BLUE,
'Magenta':Fore.MAGENTA, 'Cyan':Fore.CYAN, 'White': Fore.WHITE}
print(FORE[C] + Style.NORMAL + L*'=' + Style.RESET_ALL)
Base = 'brain-tumor-detection'
Data = pd.DataFrame({'Path': glob(os.path.join(Base, '*', '*.jpg'))})
Data['Tumor'] = Data['Path'].map(lambda x: x.split('\\')[-2].title())
Data['File'] = Data['Path'].map(lambda x: x.split('\\')[-1])
#
Data_Pred = Data.loc[Data['Tumor'] == 'Pred'].drop(columns = ['Tumor'])
Data = Data.loc[Data['Tumor'] != 'Pred']
#
Header('A Sample of Train Dataframe')
Target = 'Tumor'
Data = Data.reindex(sorted(Data.columns), axis=1)
display(Data.sample(10))
display(pd.DataFrame({'Number of Instances':[Data.shape[0]], 'Number of Attributes':[Data.shape[1]]}).style.hide_index())
A Sample of Train Dataframe ========================================================================
| File | Path | Tumor | |
|---|---|---|---|
| 637 | no221.jpg | brain-tumor-detection\no\no221.jpg | No |
| 1855 | y1263.jpg | brain-tumor-detection\yes\y1263.jpg | Yes |
| 2694 | y67.jpg | brain-tumor-detection\yes\y67.jpg | Yes |
| 1915 | y1317.jpg | brain-tumor-detection\yes\y1317.jpg | Yes |
| 806 | no374.jpg | brain-tumor-detection\no\no374.jpg | No |
| 1576 | y1011.jpg | brain-tumor-detection\yes\y1011.jpg | Yes |
| 20 | no1015.jpg | brain-tumor-detection\no\no1015.jpg | No |
| 569 | no160.jpg | brain-tumor-detection\no\no160.jpg | No |
| 34 | no1028.jpg | brain-tumor-detection\no\no1028.jpg | No |
| 1230 | no756.jpg | brain-tumor-detection\no\no756.jpg | No |
| Number of Instances | Number of Attributes |
|---|---|
| 3000 | 3 |
def DatasetDist(Table, Target, PD):
fig = make_subplots(rows=1, cols=2, horizontal_spacing = 0.02, column_widths=PD['column_widths'],
specs=[[{"type": "table"},{"type": "pie"}]])
# Right
fig.add_trace(go.Pie(labels=Table[Target].values, values=Table['Count'].values,
pull=PD['pull'], textfont=dict(size= PD['textfont']),
marker=dict(colors = PD['PieColors'], line=dict(color='black', width=1))), row=1, col=2)
fig.update_traces(hole=PD['hole'])
fig.update_layout(height = PD['height'], legend=dict(orientation=PD['legend_orientation']),
legend_title_text= PD['legend_title'])
# Left
T = Table.copy()
T['Percentage'] = T['Percentage'].map(lambda x: '%%%.2f' % x)
Temp = []
for i in T.columns:
Temp.append(T.loc[:,i].values)
fig.add_trace(go.Table(header=dict(values = list(Table.columns), line_color='darkslategray',
fill_color= PD['TableColors'][0], align=['center','center'],
font=dict(color='white', size=12), height=25), columnwidth = PD['tablecolumnwidth'],
cells=dict(values=Temp, line_color='darkslategray',
fill=dict(color= [PD['TableColors'][1], PD['TableColors'][1]]),
align=['center','center', 'center'], font_size=12, height=20)), 1, 1)
fig.update_layout(title={'text': '<b>' + Target + '<b>', 'x':PD['title_x'],
'y':PD['title_y'], 'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Name = Target
Target_Colors = ['Green','OrangeRed']
Table = Data[Target].value_counts().to_frame('Count').reset_index(drop = False).rename(columns = {'index':Name})
Table['Percentage'] = np.round(100*(Table['Count']/Table['Count'].sum()),2)
Pull = [0 for x in range((len(Table[Name])-1))]
Pull.append(.05)
PD = dict(PieColors = Target_Colors, TableColors = ['DarkSlateGray','GhostWhite'], hole = .4,
column_widths=[0.5, 0.5],textfont = 14, height = 400, tablecolumnwidth = [.1, .05, .08],
pull = Pull, legend_title = Name, legend_orientation = 'v', title_x = 0.5, title_y = 0.85)
del Pull
DatasetDist(Table, Target = Target, PD = PD)
fig, ax = plt.subplots(4, 5 , figsize = (12, 12))
_ = fig.suptitle('A Sample of Dataset', fontweight='bold', fontsize = 18)
Colors = dict(zip(Data['Tumor'].unique(), Target_Colors))
ax = ax.ravel()
for i, row in Data.sample(len(ax)).reset_index(drop =True).iterrows():
_ = ax[i].imshow(imread(row['Path']), cmap='bone')
_ = ax[i].set_title('Tumor: %s' % row['Tumor'], fontweight='bold', fontsize = 12, color = Colors[row['Tumor']])
_ = ax[i].axis("off")
_ = ax[i].set_aspect(1)
fig.tight_layout()
batch_size = 64
Img_Height, Img_Width = 200, 200
DataGen = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1./ 255.0,
brightness_range= [0.3, 0.9],
samplewise_std_normalization = True,
horizontal_flip = True,
vertical_flip = False,
rotation_range = 30,
width_shift_range = 0.2,
height_shift_range = 0.2,
shear_range = 0.2,
fill_mode= 'nearest',
zoom_range = 0.15,
validation_split=0.2)
Header('Train Images Data Generator')
train_gen = DataGen.flow_from_dataframe(dataframe = Data, directory= None, color_mode='grayscale',
x_col = 'Path', y_col = Target,
target_size = (Img_Height, Img_Width),
batch_size = batch_size, class_mode = 'categorical',
subset = 'training', shuffle=True, seed=42)
Header('Validation Images Data Generator', C = 'Green')
val_gen = DataGen.flow_from_dataframe(dataframe = Data, directory= None, color_mode='grayscale',
x_col = 'Path', y_col = Target,
target_size = (Img_Height, Img_Width),
batch_size = batch_size, class_mode = 'categorical',
subset = 'validation', shuffle=False)
Line()
Train Images Data Generator ======================================================================== Found 2400 validated image filenames belonging to 2 classes. Validation Images Data Generator =================================================================== Found 600 validated image filenames belonging to 2 classes. ====================================================================================================
A multi-layer perceptron (MLP) is a class of feedforward artificial neural network (ANN). The algorithm at each iteration uses the RMSprop algorithm to measure the loss, and then the gradient and the model update is calculated. At the end of this iterative process, we would reach a better level of agreement between test and predicted sets since the error would be lower from that of the first step.
num_classes = len(Data[Target].unique())
model = models.Sequential([layers.Conv2D(32, (5, 5), activation='relu', input_shape=(Img_Height, Img_Width, 1)),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(128, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Conv2D(256, (3, 3), activation='relu'),
layers.MaxPooling2D((2, 2)),
layers.Flatten(),
layers.Dropout(0.5),
layers.Dense(512, activation='relu'),
layers.Dense(num_classes, activation="softmax")])
model.summary()
tf.keras.utils.plot_model(model, show_shapes=True, show_dtype=True, show_layer_names=True, expand_nested = False, rankdir= 'LR')
Model: "sequential" _________________________________________________________________ Layer (type) Output Shape Param # ================================================================= conv2d (Conv2D) (None, 196, 196, 32) 832 _________________________________________________________________ max_pooling2d (MaxPooling2D) (None, 98, 98, 32) 0 _________________________________________________________________ conv2d_1 (Conv2D) (None, 96, 96, 64) 18496 _________________________________________________________________ max_pooling2d_1 (MaxPooling2 (None, 48, 48, 64) 0 _________________________________________________________________ conv2d_2 (Conv2D) (None, 46, 46, 128) 73856 _________________________________________________________________ max_pooling2d_2 (MaxPooling2 (None, 23, 23, 128) 0 _________________________________________________________________ conv2d_3 (Conv2D) (None, 21, 21, 256) 295168 _________________________________________________________________ max_pooling2d_3 (MaxPooling2 (None, 10, 10, 256) 0 _________________________________________________________________ flatten (Flatten) (None, 25600) 0 _________________________________________________________________ dropout (Dropout) (None, 25600) 0 _________________________________________________________________ dense (Dense) (None, 512) 13107712 _________________________________________________________________ dense_1 (Dense) (None, 2) 1026 ================================================================= Total params: 13,497,090 Trainable params: 13,497,090 Non-trainable params: 0 _________________________________________________________________
Compiling and fitting the model
# Number of iterations
IT = 51
model.compile(optimizer= tf.keras.optimizers.RMSprop(learning_rate = 0.001),
loss="categorical_crossentropy",
metrics=["accuracy"])
# Training the model
history = model.fit_generator(generator = train_gen, validation_data=val_gen, epochs=IT, verbose = 0)
def Search_List(Key, List): return [s for s in List if Key in s]
Metrics_Names = {'loss':'Loss', 'accuracy':'Accuracy', 'mae':'MAE', 'mse':'MSE', 'recall': 'Recall'}
def Table_modify(df, Metrics_Names = Metrics_Names):
df = df.rename(columns = Metrics_Names)
df = df.reindex(sorted(df.columns), axis=1)
df.insert(loc = 0, column = 'Iteration', value = np.arange(0, df.shape[0]), allow_duplicates=False)
return df
Validation_Table = Search_List('val_',history.history.keys())
Train_Table = list(set( history.history.keys()) - set(Validation_Table))
Validation_Table = pd.DataFrame(np.array([history.history[x] for x in Validation_Table]).T, columns = Validation_Table)
Train_Table = pd.DataFrame(np.array([history.history[x] for x in Train_Table]).T, columns = Train_Table)
Validation_Table.columns = [x.replace('val_','') for x in Validation_Table.columns]
Train_Table = Table_modify(Train_Table)
Validation_Table = Table_modify(Validation_Table)
# Train Set Score
score = model.evaluate(train_gen, batch_size = batch_size, verbose = 0)
score = pd.DataFrame(score, index = model.metrics_names).T
score.index = ['Train Set Score']
# Validation Set Score
Temp = model.evaluate(val_gen, batch_size = batch_size, verbose = 0)
Temp = pd.DataFrame(Temp, index = model.metrics_names).T
Temp.index = ['Validation Set Score']
score = score.append(Temp)
score.rename(columns= Metrics_Names, inplace = True)
score = score.reindex(sorted(score.columns), axis=1)
display(score.style.set_precision(4))
| Accuracy | Loss | |
|---|---|---|
| Train Set Score | 0.9608 | 0.1081 |
| Validation Set Score | 0.9667 | 0.1107 |
def Plot_history(history, PD, Title = False, metrics_names = [x.title() for x in model.metrics_names]):
fig = make_subplots(rows=1, cols=2, horizontal_spacing = 0.02, column_widths=[0.6, 0.4],
specs=[[{"type": "scatter"},{"type": "table"}]])
# Left
Colors = ['OrangeRed', 'MidnightBlue', 'purple']
for j in range(len(metrics_names)):
fig.add_trace(go.Scatter(x= history['Iteration'].values, y= history[metrics_names[j]].values,
line=dict(color=Colors[j], width= 1.5), name = metrics_names[j]), 1, 1)
fig.update_layout(legend=dict(x=0, y=1.1, traceorder='reversed', font_size=12),
dragmode='select', plot_bgcolor= 'white', height=600, hovermode='closest',
legend_orientation='h')
fig.update_xaxes(range=[history.Iteration.min(), history.Iteration.max()],
showgrid=True, gridwidth=1, gridcolor='Lightgray',
showline=True, linewidth=1, linecolor='Lightgray', mirror=True, row=1, col=1)
fig.update_yaxes(range=[0, PD['yLim']], showgrid=True, gridwidth=1, gridcolor='Lightgray',
showline=True, linewidth=1, linecolor='Lightgray', mirror=True, row=1, col=1)
# Right
if not PD['Table_Rows'] == None:
ind = np.linspace(0, history.shape[0], PD['Table_Rows'], endpoint = False).round(0).astype(int)
ind = np.append(ind, history.index[-1])
history = history[history.index.isin(ind)]
T = history.copy()
T[metrics_names] = T[metrics_names].applymap(lambda x: '%.4e' % x)
Temp = []
for i in T.columns:
Temp.append(T.loc[:,i].values)
TableColors = PD['TableColors']
fig.add_trace(go.Table(header=dict(values = list(history.columns), line_color=TableColors[0],
fill_color=TableColors[0], align=['center','center'], font=dict(color=TableColors[1], size=12), height=25),
columnwidth = PD['tablecolumnwidth'], cells=dict(values=Temp, line_color=TableColors[0],
fill=dict(color=[TableColors[1], TableColors[1]]),
align=['center', 'center'], font_size=12,height=20)), 1, 2)
if Title != False:
fig.update_layout(plot_bgcolor= 'white',
title={'text': Title, 'x':0.46, 'y':0.94, 'xanchor': 'center', 'yanchor': 'top'},
yaxis_title='Frequency')
fig.show()
PD = dict(Table_Rows = 25, yLim = 2, tablecolumnwidth = [0.3, 0.4, 0.4], TableColors = ['Navy','White'])
Plot_history(Train_Table, Title = 'Train Set', PD = PD)
Plot_history(Validation_Table, Title = 'Validation Set', PD = PD)